# Load needed packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(colorspace)

# Read the data
data <- read.csv("Interaction_data.csv", encoding = "latin1")

# Get unique species lists for each network
species_per_network <- data %>%
  group_by(Bioregion, Network_id) %>%
  summarise(
    Plants = list(unique(Plant_accepted_name)),
    Pollinators = list(unique(Pollinator_accepted_name)),
    .groups = "drop"
  )

# Function to calculate Jaccard similarity
jaccard_similarity <- function(set1, set2) {
  inter <- length(intersect(set1, set2))
  union <- length(union(set1, set2))
  if (union == 0) return(NA) else return(inter / union)
}

# Loop over each bioregion and create heatmap for each one
bioregions <- unique(species_per_network$Bioregion)
# For plants species 

for (bio in bioregions) {
  cat("Plotting Bioregion:", bio, "\n")
  
  sub_data <- species_per_network %>% filter(Bioregion == bio)
  nets <- sub_data$Network_id
  
  # Build similarity matrix
  mat <- matrix(NA, nrow = length(nets), ncol = length(nets),
                dimnames = list(nets, nets))
  for (i in seq_along(nets)) {
    for (j in seq_along(nets)) {
      mat[i,j] <- jaccard_similarity(sub_data$Plants[[i]], sub_data$Plants[[j]])
    }
  }
  
  # Turn into dataframe for ggplot
  mat_df <- as.data.frame(as.table(mat))
  
  # Plot heatmap
  p <- ggplot(mat_df, aes(Var1, Var2, fill = Freq)) +
    geom_tile(color = "white", size = 0.01)  +
    scale_fill_continuous_sequential(palette = "Viridis",) +
    theme_minimal() +
    labs(title = paste("Plant species similarity -", bio),
         x = "Network_id", y = "Network_id", fill = "Jaccard Index") +
    theme(axis.text.x = element_text(angle = 80, hjust = 1, size = 3),
          axis.text.y = element_text(size = 3))
  
  
  print(p)
}
## Plotting Bioregion: Alpine
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Plotting Bioregion: Atlantic

## Plotting Bioregion: Boreal

## Plotting Bioregion: Continental

## Plotting Bioregion: Mediterranean

## Plotting Bioregion: Pannonian

## Plotting Bioregion: Steppic

# For pollinators species 

for (bio in bioregions) {
  cat("Plotting Bioregion:", bio, "\n")
  
  sub_data <- species_per_network %>% filter(Bioregion == bio)
  nets <- sub_data$Network_id
  
  # Build similarity matrix
  mat <- matrix(NA, nrow = length(nets), ncol = length(nets),
                dimnames = list(nets, nets))
  for (i in seq_along(nets)) {
    for (j in seq_along(nets)) {
      mat[i,j] <- jaccard_similarity(sub_data$Pollinators[[i]], sub_data$Pollinators[[j]])
    }
  }
  
  # Turn into dataframe for ggplot
  mat_df <- as.data.frame(as.table(mat))
  
  # Plot heatmap
  p <- ggplot(mat_df, aes(Var1, Var2, fill = Freq)) +
    geom_tile(color = "white", size = 0.01) +
    scale_fill_continuous_sequential(palette = "Viridis") +
    theme_minimal() +
    labs(title = paste("Pollinators species similarity -", bio),
         x = "Network_id", y = "Network_id", fill = "Jaccard Index") +
    theme(axis.text.x = element_text(angle = 80, hjust = 1, size = 3),
          axis.text.y = element_text(size = 3))
  
  
  print(p)
}
## Plotting Bioregion: Alpine

## Plotting Bioregion: Atlantic

## Plotting Bioregion: Boreal

## Plotting Bioregion: Continental

## Plotting Bioregion: Mediterranean

## Plotting Bioregion: Pannonian

## Plotting Bioregion: Steppic